Empower Smart Home owners with greater understanding and control of their power usage. Reach the developer’s goal of offering highly efficient Smart Homes that providing owners with power usage analytics. Provide accurate energy monitoring and forecasting for the utility company. Dataset Information
The data was gathered in a house located in Sceaux, France. It contains the measurements of electric power consumption from 3 sub-meters in the household with a one-minute sampling rate over the period of almost 4 years. We collected the data from UC Irvine machine learning repository. It was stored on Amazon Web Service with a password to access for security. Sub-meters Information Sub-metering 1, Kitchen(a dishwasher, an oven and a microwave). Sub-metering 2, Laundry room(a washing-machine, a tumble-drier, a refrigerator and a light). Sub-metering 3, Electric water-heater and an air-conditioner. Data Visualization and Analysis Process Load libraies and connect to the dataset
The data was stored on Amazon Web Service, we fist made a connection to connect the database. Then we used SQL to query all the tables we need within the database. We examined the structure of all the data frames, then combined all the tables into one dataframe using dplyr package.
## load the Libraries
library(RMySQL)
## Loading required package: DBI
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(ggplot2)
library(tibbletime)
##
## Attaching package: 'tibbletime'
## The following object is masked from 'package:stats':
##
## filter
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(ggfortify)
## Registered S3 methods overwritten by 'ggfortify':
## method from
## autoplot.Arima forecast
## autoplot.acf forecast
## autoplot.ar forecast
## autoplot.bats forecast
## autoplot.decomposed.ts forecast
## autoplot.ets forecast
## autoplot.forecast forecast
## autoplot.stl forecast
## autoplot.ts forecast
## fitted.ar forecast
## fortify.ts forecast
## residuals.ar forecast
# Create a database connection
con = dbConnect(MySQL(), user='deepAnalytics', password='Sqltask1234!',
dbname='dataanalytics2018',
host='data-analytics-2018.cbrosir2cswx.us-east-1.rds.amazonaws.com')
## List the tables contained in the database
dbListTables(con)
## [1] "iris" "yr_2006" "yr_2007" "yr_2008" "yr_2009" "yr_2010"
## Use the dbGetQuery function to download tables and select Date, Time and the 3 sub-meter attributes
yr_2006 <- dbGetQuery(con, "SELECT Date, Time, Sub_metering_1, Sub_metering_2,
Sub_metering_3 FROM yr_2006")
yr_2007 <- dbGetQuery(con, "SELECT Date, Time, Sub_metering_1, Sub_metering_2,
Sub_metering_3 FROM yr_2007")
yr_2008 <- dbGetQuery(con, "SELECT Date, Time, Sub_metering_1, Sub_metering_2,
Sub_metering_3 FROM yr_2008")
yr_2009 <- dbGetQuery(con, "SELECT Date, Time, Sub_metering_1, Sub_metering_2,
Sub_metering_3 FROM yr_2009")
yr_2010 <- dbGetQuery(con, "SELECT Date, Time, Sub_metering_1, Sub_metering_2,
Sub_metering_3 FROM yr_2010")
## Insvestigate each new data frame
str(yr_2006)
## 'data.frame': 21992 obs. of 5 variables:
## $ Date : chr "2006-12-16" "2006-12-16" "2006-12-16" "2006-12-16" ...
## $ Time : chr "17:24:00" "17:25:00" "17:26:00" "17:27:00" ...
## $ Sub_metering_1: num 0 0 0 0 0 0 0 0 0 0 ...
## $ Sub_metering_2: num 1 1 2 1 1 2 1 1 1 2 ...
## $ Sub_metering_3: num 17 16 17 17 17 17 17 17 17 16 ...
str(yr_2007)
## 'data.frame': 521669 obs. of 5 variables:
## $ Date : chr "2007-01-01" "2007-01-01" "2007-01-01" "2007-01-01" ...
## $ Time : chr "00:00:00" "00:01:00" "00:02:00" "00:03:00" ...
## $ Sub_metering_1: num 0 0 0 0 0 0 0 0 0 0 ...
## $ Sub_metering_2: num 0 0 0 0 0 0 0 0 0 0 ...
## $ Sub_metering_3: num 0 0 0 0 0 0 0 0 0 0 ...
str(yr_2008)
## 'data.frame': 526905 obs. of 5 variables:
## $ Date : chr "2008-01-01" "2008-01-01" "2008-01-01" "2008-01-01" ...
## $ Time : chr "00:00:00" "00:01:00" "00:02:00" "00:03:00" ...
## $ Sub_metering_1: num 0 0 0 0 0 0 0 0 0 0 ...
## $ Sub_metering_2: num 0 0 0 0 0 0 0 0 0 0 ...
## $ Sub_metering_3: num 18 18 18 18 18 17 18 18 18 18 ...
str(yr_2009)
## 'data.frame': 521320 obs. of 5 variables:
## $ Date : chr "2009-01-01" "2009-01-01" "2009-01-01" "2009-01-01" ...
## $ Time : chr "00:00:00" "00:01:00" "00:02:00" "00:03:00" ...
## $ Sub_metering_1: num 0 0 0 0 0 0 0 0 0 0 ...
## $ Sub_metering_2: num 0 0 0 0 0 0 0 0 0 0 ...
## $ Sub_metering_3: num 0 0 0 0 0 0 0 0 0 0 ...
str(yr_2010)
## 'data.frame': 457394 obs. of 5 variables:
## $ Date : chr "2010-01-01" "2010-01-01" "2010-01-01" "2010-01-01" ...
## $ Time : chr "00:00:00" "00:01:00" "00:02:00" "00:03:00" ...
## $ Sub_metering_1: num 0 0 0 0 0 0 0 0 0 0 ...
## $ Sub_metering_2: num 0 0 0 0 0 0 0 0 0 0 ...
## $ Sub_metering_3: num 18 18 19 18 18 19 18 18 19 18 ...
summary(yr_2007)
## Date Time Sub_metering_1 Sub_metering_2
## Length:521669 Length:521669 Min. : 0.000 Min. : 0.000
## Class :character Class :character 1st Qu.: 0.000 1st Qu.: 0.000
## Mode :character Mode :character Median : 0.000 Median : 0.000
## Mean : 1.232 Mean : 1.638
## 3rd Qu.: 0.000 3rd Qu.: 1.000
## Max. :78.000 Max. :78.000
## Sub_metering_3
## Min. : 0.000
## 1st Qu.: 0.000
## Median : 0.000
## Mean : 5.795
## 3rd Qu.:17.000
## Max. :20.000
summary(yr_2008)
## Date Time Sub_metering_1 Sub_metering_2
## Length:526905 Length:526905 Min. : 0.00 Min. : 0.000
## Class :character Class :character 1st Qu.: 0.00 1st Qu.: 0.000
## Mode :character Mode :character Median : 0.00 Median : 0.000
## Mean : 1.11 Mean : 1.256
## 3rd Qu.: 0.00 3rd Qu.: 1.000
## Max. :80.00 Max. :76.000
## Sub_metering_3
## Min. : 0.000
## 1st Qu.: 0.000
## Median : 1.000
## Mean : 6.034
## 3rd Qu.:17.000
## Max. :31.000
summary(yr_2009)
## Date Time Sub_metering_1 Sub_metering_2
## Length:521320 Length:521320 Min. : 0.000 Min. : 0.000
## Class :character Class :character 1st Qu.: 0.000 1st Qu.: 0.000
## Mode :character Mode :character Median : 0.000 Median : 0.000
## Mean : 1.137 Mean : 1.136
## 3rd Qu.: 0.000 3rd Qu.: 1.000
## Max. :82.000 Max. :77.000
## Sub_metering_3
## Min. : 0.000
## 1st Qu.: 0.000
## Median : 1.000
## Mean : 6.823
## 3rd Qu.:18.000
## Max. :31.000
summary(yr_2010)
## Date Time Sub_metering_1 Sub_metering_2
## Length:457394 Length:457394 Min. : 0.0000 Min. : 0.000
## Class :character Class :character 1st Qu.: 0.0000 1st Qu.: 0.000
## Mode :character Mode :character Median : 0.0000 Median : 0.000
## Mean : 0.9875 Mean : 1.102
## 3rd Qu.: 0.0000 3rd Qu.: 1.000
## Max. :88.0000 Max. :80.000
## Sub_metering_3
## Min. : 0.000
## 1st Qu.: 1.000
## Median : 1.000
## Mean : 7.244
## 3rd Qu.:18.000
## Max. :31.000
## Create the Date Set
df <- bind_rows(yr_2007, yr_2008, yr_2009)
str(df)
## 'data.frame': 1569894 obs. of 5 variables:
## $ Date : chr "2007-01-01" "2007-01-01" "2007-01-01" "2007-01-01" ...
## $ Time : chr "00:00:00" "00:01:00" "00:02:00" "00:03:00" ...
## $ Sub_metering_1: num 0 0 0 0 0 0 0 0 0 0 ...
## $ Sub_metering_2: num 0 0 0 0 0 0 0 0 0 0 ...
## $ Sub_metering_3: num 0 0 0 0 0 0 0 0 0 0 ...
summary(df)
## Date Time Sub_metering_1 Sub_metering_2
## Length:1569894 Length:1569894 Min. : 0.000 Min. : 0.000
## Class :character Class :character 1st Qu.: 0.000 1st Qu.: 0.000
## Mode :character Mode :character Median : 0.000 Median : 0.000
## Mean : 1.159 Mean : 1.343
## 3rd Qu.: 0.000 3rd Qu.: 1.000
## Max. :82.000 Max. :78.000
## Sub_metering_3
## Min. : 0.000
## 1st Qu.: 0.000
## Median : 1.000
## Mean : 6.216
## 3rd Qu.:17.000
## Max. :31.000
head(df)
## Date Time Sub_metering_1 Sub_metering_2 Sub_metering_3
## 1 2007-01-01 00:00:00 0 0 0
## 2 2007-01-01 00:01:00 0 0 0
## 3 2007-01-01 00:02:00 0 0 0
## 4 2007-01-01 00:03:00 0 0 0
## 5 2007-01-01 00:04:00 0 0 0
## 6 2007-01-01 00:05:00 0 0 0
## Combine Date and Time attribute values in a new attribute column
df <-cbind(df,paste(df$Date,df$Time), stringsAsFactors=FALSE)
colnames(df)[6]
## [1] "paste(df$Date, df$Time)"
## Give the new attribute a header name
colnames(df)[6] <-"DateTime"
df$DateTime <- as.POSIXct(df$DateTime, "%Y/%m/%d %H:%M:%S")
## Warning in strptime(xx, f, tz = tz): unknown timezone '%Y/%m/%d %H:%M:%S'
## Warning in as.POSIXct.POSIXlt(x): unknown timezone '%Y/%m/%d %H:%M:%S'
## Warning in strptime(x, f, tz = tz): unknown timezone '%Y/%m/%d %H:%M:%S'
## Warning in as.POSIXct.POSIXlt(as.POSIXlt(x, tz, ...), tz, ...): unknown timezone
## '%Y/%m/%d %H:%M:%S'
## Add the time zone
attr(df$DateTime, "tzone") <- "Europe/Paris"
## convert the dataframe to tibbletime
df <- as_tbl_time(df, index = DateTime)
head(df)
## # A time tibble: 6 × 6
## # Index: DateTime
## Date Time Sub_metering_1 Sub_metering_2 Sub_metering_3 DateTime
## <chr> <chr> <dbl> <dbl> <dbl> <dttm>
## 1 2007-0… 00:0… 0 0 0 2007-01-01 01:00:00
## 2 2007-0… 00:0… 0 0 0 2007-01-01 01:01:00
## 3 2007-0… 00:0… 0 0 0 2007-01-01 01:02:00
## 4 2007-0… 00:0… 0 0 0 2007-01-01 01:03:00
## 5 2007-0… 00:0… 0 0 0 2007-01-01 01:04:00
## 6 2007-0… 00:0… 0 0 0 2007-01-01 01:05:00
## Create "year" attribute with lubridale
df$year <- year(df$DateTime)
## quarter, month, week, weekday, day, hour and minute
df$quarter <- quarter(df$DateTime)
df$month <- month(df$DateTime)
df$week <- week(df$DateTime)
df$weekday <- weekdays(df$DateTime)
df$day <- day(df$DateTime)
df$hour <- hour(df$DateTime)
df$minute <- minute(df$DateTime)
#Initial exploration
summary(df)
## Date Time Sub_metering_1 Sub_metering_2
## Length:1569894 Length:1569894 Min. : 0.000 Min. : 0.000
## Class :character Class :character 1st Qu.: 0.000 1st Qu.: 0.000
## Mode :character Mode :character Median : 0.000 Median : 0.000
## Mean : 1.159 Mean : 1.343
## 3rd Qu.: 0.000 3rd Qu.: 1.000
## Max. :82.000 Max. :78.000
## Sub_metering_3 DateTime year quarter
## Min. : 0.000 Min. :2007-01-01 01:00:00 Min. :2007 Min. :1.00
## 1st Qu.: 0.000 1st Qu.:2007-10-03 08:39:15 1st Qu.:2007 1st Qu.:2.00
## Median : 1.000 Median :2008-07-01 22:05:30 Median :2008 Median :3.00
## Mean : 6.216 Mean :2008-07-02 03:54:14 Mean :2008 Mean :2.51
## 3rd Qu.:17.000 3rd Qu.:2009-03-31 14:32:45 3rd Qu.:2009 3rd Qu.:4.00
## Max. :31.000 Max. :2010-01-01 00:59:00 Max. :2010 Max. :4.00
## month week weekday day
## Min. : 1.000 Min. : 1.00 Length:1569894 Min. : 1.00
## 1st Qu.: 4.000 1st Qu.:13.00 Class :character 1st Qu.: 8.00
## Median : 7.000 Median :27.00 Mode :character Median :16.00
## Mean : 6.529 Mean :26.62 Mean :15.71
## 3rd Qu.:10.000 3rd Qu.:40.00 3rd Qu.:23.00
## Max. :12.000 Max. :53.00 Max. :31.00
## hour minute
## Min. : 0.0 Min. : 0.00
## 1st Qu.: 5.0 1st Qu.:14.25
## Median :12.0 Median :30.00
## Mean :11.5 Mean :29.50
## 3rd Qu.:18.0 3rd Qu.:44.00
## Max. :23.0 Max. :59.00
sd(df$Sub_metering_1)
## [1] 6.288272
sd(df$Sub_metering_2)
## [1] 5.972199
sd(df$Sub_metering_3)
## [1] 8.341281
mode(df$Sub_metering_1)
## [1] "numeric"
sum(df$Sub_metering_1)
## [1] 1819989
sum(df$Sub_metering_2)
## [1] 2108410
sum(df$Sub_metering_3)
## [1] 9758843
-One minute sampling rate is too frequent for our analysis. Thus, first we need to adjust granularity(frequency) to maximize the information to be gained. - For this project, we reduce the granularity into every 10 minutes, 30 minutes, 40 minutes and 1 hour. -We plotted multiple power consumption plots by the different ways we filter the data, we wanted to see if we can get any useful insights from them.
## Reduce the granularity to every 30 minutes
df_30min <- as_period(df, '30 minute')
## Subset the second week of 2008
houseWeek <- filter(df, year == 2008, week == 2)
## Plot subset houseWeek
plot(houseWeek$Sub_metering_1)
## First, we plotted the consumption of sub-meter 1 for second week of 2008.
## Power consumption appears to peek during the weekend.
## Subset the 9th day of January 2008 - All observations
houseDay <- filter(df, year == 2008 & month == 1 & day == 9)
## Plot sub-meter 1, 2 and 3 with title, legend and labels - All observations
plot_ly(houseDay, x = ~houseDay$DateTime, y = ~houseDay$Sub_metering_1, name = 'Kitchen', type = 'scatter', mode = 'lines') %>%
add_trace(y = ~houseDay$Sub_metering_2, name = 'Laundry Room', mode = 'lines') %>%
add_trace(y = ~houseDay$Sub_metering_3, name = 'Water Heater & AC', mode = 'lines') %>%
layout(title = "Power Consumption January 9th, 2008",
xaxis = list(title = "Time"),
yaxis = list (title = "Power (watt-hours)"))
# Subset week 2 from year 2008 with 30 Minutes frequency
week2_30min_fr <- filter(df, year == 2008 & month == 1 & week == 2 & (minute == 0 | minute == 30 ))
## Plot sub-meter 1, 2 and 3 with title, legend and labels - 30 Minute frequency
plot_ly(week2_30min_fr, x = ~week2_30min_fr$DateTime, y = ~week2_30min_fr$Sub_metering_1, name = 'Kitchen',
type = 'scatter', mode = 'lines') %>%
add_trace(y = ~week2_30min_fr$Sub_metering_2, name = 'Laundry Room', mode = 'lines') %>%
add_trace(y = ~week2_30min_fr$Sub_metering_3, name = 'Water Heater & AC', mode = 'lines') %>%
layout(title = "Power Consumption of Week 2, 2008",
xaxis = list(title = "Time in every 30 minutes"),
yaxis = list (title = "Power (watt-hours)"))
## Choose monthly time period and use 60 mins interval
df_1hr <- as_period(df, "1 h")
df_1hr
## # A time tibble: 26,174 × 14
## # Index: DateTime
## Date Time Sub_metering_1 Sub_metering_2 Sub_metering_3 DateTime
## <chr> <chr> <dbl> <dbl> <dbl> <dttm>
## 1 2007… 00:0… 0 0 0 2007-01-01 01:00:00
## 2 2007… 01:0… 0 0 0 2007-01-01 02:00:00
## 3 2007… 02:0… 0 0 0 2007-01-01 03:00:00
## 4 2007… 03:0… 0 2 0 2007-01-01 04:00:00
## 5 2007… 04:0… 0 0 0 2007-01-01 05:00:00
## 6 2007… 05:0… 0 0 0 2007-01-01 06:00:00
## 7 2007… 06:0… 0 0 0 2007-01-01 07:00:00
## 8 2007… 07:0… 0 0 0 2007-01-01 08:00:00
## 9 2007… 08:0… 0 1 0 2007-01-01 09:00:00
## 10 2007… 09:0… 0 0 0 2007-01-01 10:00:00
## # … with 26,164 more rows, and 8 more variables: year <dbl>, quarter <int>,
## # month <dbl>, week <dbl>, weekday <chr>, day <int>, hour <int>, minute <int>
housemonth1hr <- filter(df_1hr, year == 2008 , month ==1)
plot_ly(housemonth1hr, x = ~housemonth1hr$DateTime, y = ~housemonth1hr$Sub_metering_1, name = 'Kitchen',
type = 'scatter', mode = 'lines') %>%
add_trace(y = ~housemonth1hr$Sub_metering_2, name = 'Laundry Room', mode = 'lines') %>%
add_trace(y = ~housemonth1hr$Sub_metering_3, name = 'Water Heater & AC', mode = 'lines') %>%
layout(title = "Power Consumption for Jan,2008",
xaxis = list(title = "Time"),
yaxis = list (title = "Power (watt-hours)"))
# Subset to one observation per week on Mondays at 8:00pm for 2007, 2008 and 2009
house070809weekly <- filter(df, day == "2" & hour == 20 & minute == 0)
# Create TS object with SubMeter3
tsSM3_070809weekly <- ts(house070809weekly$Sub_metering_3, frequency=52,
start=c(2007,1), end = c(2009,1))
# Plot sub-meter 3 with plot.ts
plot.ts(tsSM3_070809weekly, xlab = "Watt Hours", main = "Sub-meter 3, Mondays, 20:00", col="red")
- This Time-Series shows sub-meter 3 (Water Heater and A/C) at 8pm on
every Monday from 2007 to 2009
## Create a TS plot for Sub-meter 3 on Saturdays at 20:00
house070809weeklySat <- filter(df, weekday == 'Saturday' & hour == 20 & minute == 0)
house070809weeklySat
## # A time tibble: 153 × 14
## # Index: DateTime
## Date Time Sub_metering_1 Sub_metering_2 Sub_metering_3 DateTime
## <chr> <chr> <dbl> <dbl> <dbl> <dttm>
## 1 2007… 19:0… 0 0 0 2007-01-06 20:00:00
## 2 2007… 19:0… 0 0 0 2007-01-13 20:00:00
## 3 2007… 19:0… 0 35 16 2007-01-20 20:00:00
## 4 2007… 19:0… 0 0 0 2007-01-27 20:00:00
## 5 2007… 19:0… 35 34 16 2007-02-03 20:00:00
## 6 2007… 19:0… 0 0 17 2007-02-10 20:00:00
## 7 2007… 19:0… 0 0 18 2007-02-17 20:00:00
## 8 2007… 19:0… 0 0 0 2007-02-24 20:00:00
## 9 2007… 19:0… 0 1 0 2007-03-03 20:00:00
## 10 2007… 19:0… 0 1 0 2007-03-10 20:00:00
## # … with 143 more rows, and 8 more variables: year <dbl>, quarter <int>,
## # month <dbl>, week <dbl>, weekday <chr>, day <int>, hour <int>, minute <int>
tsSM3_070809weeklySat <- ts(house070809weeklySat$Sub_metering_3,
frequency=52, start=c(2007,1))
tsSM3_070809weeklySat
## Time Series:
## Start = c(2007, 1)
## End = c(2009, 49)
## Frequency = 52
## [1] 0 0 16 0 16 17 18 0 0 0 16 16 18 0 0 0 16 0 17 0 16 0 18 17 0
## [26] 0 0 17 0 0 0 0 18 18 0 0 0 6 18 18 0 0 0 17 18 17 17 17 0 17
## [51] 16 16 17 17 16 0 16 16 0 17 0 17 17 18 18 18 0 1 19 1 19 18 18 0 18
## [76] 10 18 1 11 30 0 1 1 1 12 1 0 1 18 1 0 18 17 0 0 17 0 17 0 0
## [101] 18 0 0 17 17 0 17 17 17 0 0 17 17 17 18 18 18 1 18 1 1 18 1 18 0
## [126] 0 30 0 1 1 1 1 19 18 0 1 0 19 18 30 18 1 18 1 29 18 18 17 19 18
## [151] 0 0 19
plot.ts(tsSM3_070809weekly, xlab = "Time", ylab = "Watt Hours",
main = "Sub-meter 3, Saturdays, 20:00 ")
## Create a TS plot for Sub-meter 1 on Saturdays at 20:00
house070809weeklySat20 <- filter(df, weekday == 'Saturday' & hour == 20 &
minute == 0)
tsSM1_070809weeklySat20 <- ts(house070809weeklySat20$Sub_metering_1,
frequency = 52, start = c(2007,1))
plot.ts(tsSM1_070809weeklySat20, xlab = "Time", ylab = "Watt Hours",
main = "Sub-meter 1, Saturdays, 20:00 ")
## Create a TS plot for Sub-meter 2 in May, 2007, 2008, 2009
house07Monthly <- filter(df, year == 2007 & month == 5 )
tsSM2_07Monthly <- ts(house07Monthly$Sub_metering_2,
frequency = 24)
plot.ts(tsSM2_07Monthly, xlab = "Time", ylab = "Watt Hours",
main = "Sub-meter 2, May, 2007")
# May, 2008
house08Monthly <- filter(df, year == 2008 & month == 5 )
tsSM2_08Monthly <- ts(house08Monthly$Sub_metering_2,
frequency = 24)
plot.ts(tsSM2_08Monthly, xlab = "Time", ylab = "Watt Hours",
main = "Sub-meter 2, May, 2008 ")
# May, 2009
house09Monthly <- filter(df, year == 2009 & month == 5 )
tsSM2_09Monthly <- ts(house09Monthly$Sub_metering_2,
frequency = 24)
plot.ts(tsSM2_09Monthly, xlab = "Time", ylab = "Watt Hours",
main = "Sub-meter 2, May, 2009 ")
# Apply time series linear regression to the sub-meter 3 ts object with trend and season properties
fitSM3 <- tslm(tsSM3_070809weekly ~ trend + season)
# Create the forecast for sub-meter 3. Forecast ahead 20 times periods and with confidence levels 80 and 90
forecastfitSM3c <- forecast(fitSM3, h=20, level=c(80,90))
# Forecast of Sub-meter 2 for the first week of June, 2007..
fitSM2 <- tslm(tsSM2_07Monthly ~ trend + season)
forecastfitSM2 <- forecast(fitSM2, h = 168, level = c(80, 90))
plot(forecastfitSM2, ylim = c(0, 40), ylab = "Watt-Hours", xlab = "Time",
main = "Sub-meter 2 Forecast, First Week of Jun, 2007")
# Decompose Sub-meter 3 into trend, seasonal and random
components070809SM3weekly <- decompose(tsSM3_070809weekly)
## Plot decomposed sub-meter 3
plot(components070809SM3weekly)
# Sub-meter 1 decomposition and the plot
components070809SM1weekly <- decompose(tsSM1_070809weeklySat20)
plot(components070809SM1weekly)
# Sub-meter 2 decomposition and the plot
components07SM2monthly <- decompose(tsSM2_07Monthly)
plot(components07SM2monthly)
Holt-Winters Forecasting Since our data has trend and seasonal components, here we are going to try anoter way to make time series forecasting.
# Seasonal adjusting sub-meter 3 by subtracting the seasonal component
tsSM3_070809Adjusted <- tsSM3_070809weekly - components070809SM3weekly$seasonal
# Test Seasonal Adjustment by running Decompose again
plot(decompose(tsSM3_070809Adjusted))
# Holt Winters Exponential Smoothing
tsSM3_HW070809 <- HoltWinters(tsSM3_070809weekly)
# Forecast HoltWinters with confidence levels
tsSM3_HW070809for <- forecast(tsSM3_HW070809, h=25, level=c(85,95))
# Plot only the forecasted area
plot(tsSM3_HW070809for, ylim = c(0, 30), ylab= "Watt-Hours", xlab="Time - Sub-meter 3",
start(2010), main = "Sub-meter 3 HWForecast, Mondays, 20:00")
- Sub-meter 3(Water Heater & AC) forecasting for the first 25 weeks
on Mondays at 20:00 for 2010.
# Sub_meter 1
tsSM1_070809Adjusted <- tsSM1_070809weeklySat20 - components070809SM1weekly$seasonal
tsSM1_HW070809 <- HoltWinters(tsSM1_070809Adjusted)
tsSM1_HW070809for <- forecast(tsSM1_HW070809, h=25, level=c(85,95))
plot(tsSM1_HW070809for, ylim = c(0, 60), ylab= "Watt-Hours", xlab="Time - Sub-meter 1",
start(2010), main = "Sub-meter 1 HWForecast, Saturdays, 20:00")
# Sub_meter 2
tsSM2_07Adjusted <- tsSM2_07Monthly - components07SM2monthly$seasonal
tsSM2_HW07 <- HoltWinters(tsSM2_07Adjusted)
tsSM2_HW07for <- forecast(tsSM2_HW07, h=24, level=c(85,95))
plot(tsSM2_HW07for, ylim = c(0, 20), ylab= "Watt-Hours", xlab="Time - Sub-meter 2",
start(30), main = "Sub-meter 2 HWForecast, May, 2007")